***User should have three .dta files ready including:
*Core file from Wave 13-16 (wave13-16_FULL.dta)
*License file from the Topcial module in Wave 13 (Module.dta)
*Data defining ban and non-ban licensing (mandatory_unique.dta)
*Wave 2 that contains history of advanced courses taken (Wave2_topical.dta)


***Line 10 to line 327 are all about data cleaning
***User should start with the core file and run the folloing code
set matsize 1500
clear

use "wave13-16_FULL.dta"
keep if swave==13 
*& srefmon==4

*****Merge with license
merge m:1 lgtkey using "Module.dta", generate(_licensemerge)


merge m:1 lgtkey using "Wave2_topical", generate(edumerge) keepusing(ecourse1 ecourse2 ecourse3)
drop if edumerge==2
gen math=0
replace math=1 if ecourse1==1
gen sci=0
replace sci=1 if ecourse2==1
gen eng=0
replace eng=1 if ecourse3==1
********************************Data Management***********************
drop if tage<18 
drop if tage >64 

* Work hour<1
drop if ejbhrs1<1


*delege useless license data
drop if  iprocert <1

*Delete imptued license 
drop if iwhopcer <0 

* Define demographic
generate female =1
replace female =0 if esex==1
drop if female==1

generate black=0
replace black=1 if erace==2

generate hispanic=0
replace hispanic=1 if eorigin==1 & black==0

*Create hourly wage
generate long wage = (tpmsum1)/4/ejbhrs1
drop if wage>100
drop if wage<5
gen lwage =log(wage)
generate union=1
replace union=0 if eunion1==2

*Delete imputed union data
drop if eunion1 <0 

*Drop race other than white and black
gen other=0
replace other=1 if erace>2

***Merging with Module data of license (skip)
***Create license dummy (skip)

*Delete imputed licensure source
drop if iwhopcer <0 


*Create gov't summy
generate govt=0
replace govt=1 if inrange(eclwrk1, 3,5)

*service worker?
gen service=0
replace service=1 if inrange(ejbind1,6870,9290)

*Create education dummy
generate hs =0
replace hs=1 if eeducate==39
generate somecollege =0
replace somecollege=1 if inrange(eeducate,40,43)
generate college=0
replace college=1 if eeducate ==44
generate postgrad=0
replace postgrad=1 if eeducate >44

*Create self-employment and big company dummy
generate self_emp=1
replace self_emp=0 if ebuscntr<0

gen age2=tage*tage
rename tage age


*Ensure that the license variable does not contain certified
replace license=0 if cert==1

*Ensure the license indicator include only license being required by job
gen licensenorequire=0
replace licensenorequire=1 if irjpcert!=1 & license==1
replace license=0 if irjpcert>1


************** Redefine licensing by 50/50 rule
egen licensecheck=mean(license), by(tfipsst tjbocc1)
replace license=0 if licensecheck<0.5
replace license=1 if licensecheck>=0.5


*Merge with ban data
gen state_occ=string(tfipsst)+"-"+string(tjbocc1) if license==1

merge m:1 state_occ using "mandatory_unique.dta", generate(_felonymerge)

drop if _felonymerge==2
replace permban=0 if _felonymerge==1
replace felony=0 if _felonymerge==1


rename permban Ban
label var Ban "Ban"

***"permban" only contains permanent ban
***"felony" contains both temporary and permanent ban

foreach y in license Ban{
	gen `y'_black=`y'*black
	label var `y'_black "`y'*black"
}

keep if apmsum1==0 & inrange(wage,5,100)


gen person=0
replace person=1 if iwhypcer==2


***************** Race selection
drop if other==1
drop if hispanic==1 & black==0


******************************************
*********** Balancing test *******
******************************************

local controls  age hs somecollege college postgrad union govt self_em service eng math sci person 

quietly regress lwage black license cert licensenorequire  `controls' i.tfipsst i.tjbocc1 i.rhcalmn if apmsum1==0 & inrange(wage,5,100) [pweight=wpfinwgt], vce(cluster tfipsst)

foreach y in license Ban{
regress `y' black  `controls' i.tfipsst i.rhcalmn if  e(sample)==1 , vce(cluster tfipsst)
outreg2 using balancing.tex, keep(black  `controls') ctitle("`y'") 
regress `y' black  `controls' i.tfipsst i.tjbocc1 i.rhcalmn if e(sample)==1, vce(cluster tfipsst)
outreg2 using balancing.tex, keep(black  `controls') addtext(Occupation FE,X) ctitle("`y'")
outreg2 using balancing.tex, keep(black  `controls') addtext(Occupation FE,X) ctitle("`y'") stats(beta)
}

regress Ban black  `controls' i.tfipsst i.rhcalmn if  e(sample)==1 & license==1, vce(cluster tfipsst)
outreg2 using balancing.tex, keep(black  `controls')  
regress Ban black  `controls' i.tfipsst i.tjbocc1 i.rhcalmn if e(sample)==1 & license==1, vce(cluster tfipsst)
outreg2 using balancing.tex, keep(black  `controls') addtext(Occupation FE,X)  
outreg2 using balancing.tex, keep(black  `controls') addtext(Occupation FE,X) stats(beta)
*/
